# Set global options
knitr::opts_chunk$set(echo = TRUE
, warning = FALSE
, message = FALSE
, fig.width = 10
, fig.height = 10
, results = "asis")
options(width = 12)
## Install or load required packages
library(dplyr)
library(tibble)
library(DT)
library(data.table)
library(ggplot2)
theme_set(theme_bw() +
theme(panel.spacing=grid::unit(0,"lines")))
library(scales)
library(gifski)
library(gganimate)
## Source helper functions from funs dir
source("funs/globalFuns.R")
This a five-year survival rates data for the patients of various Cancer types in the US from \(1963\) to \(2013\). The data was published by the US National Cancer Institute and downloaded from Data World.
## Create output directory
createDir(dirname = "HW6", dirpath = ".")
## Load/download data
url = "https://query.data.world/s/idhkwmkwynt4n4jlgmkslf43c5pd24"
raw_df <- downloadDf(filename = "cancer_survival_rates_usa"
, filetype = "csv"
, df_url = url
)
Reading dataset from your computer… cancer_survival_rates_usa.csv dataset already saved!!!
Variables
df_summary <- (raw_df
%>% mutate_at("Year", as.factor)
%>% summarizeDf()
%>% setnames("Summary", "Summary ([min, max]; mean (sd) / label(%))")
)
datatable(df_summary, rownames = FALSE)
Cleaning
# To plot overall cancer survival for all cancer types
raw_df <- (raw_df
%>% filter(!is.na(Survival.Rate))
%>% setnames(names(.), tolower(gsub("\\.", "_", names(.))))
)
# To plot cancer survival rates by race and gender
working_df <- (raw_df
%>% filter(!grepl("^All", race) & !grepl("All", cancer_type) & !grepl("total", gender))
)
Overall Cancer Survival
overall_df <- (raw_df
%>% filter(grepl("^All", race) & grepl("total", gender) & !grepl("^All", cancer_type))
%>% group_by(year)
%>% mutate(ranking = min_rank(-survival_rate))
)
plot1 <- (ggplot(overall_df, aes(ranking, group = cancer_type, colour = cancer_type, fill = cancer_type))
+ geom_tile(aes(y = survival_rate/2, height = survival_rate, width = 0.9), colour = NA)
+ geom_text(aes(y = 0, label = paste0(cancer_type, " ")), vjust = 0.2, hjust = 1)
+ geom_text(aes(y = survival_rate, label = paste0(" ", percent(survival_rate)), hjust = 0))
+ coord_flip(clip = "off", expand = TRUE)
+ scale_x_reverse()
+ scale_color_viridis_d(name="")
+ scale_fill_viridis_d(name="")
+ guides(color = FALSE, fill = FALSE)
+ theme_minimal()
+ theme(plot.title = element_text(hjust = 0.5, face = "bold", colour = "grey")
, axis.ticks.y = element_blank()
, axis.text.y = element_blank()
, plot.margin = margin(1,1,1,4, "cm")
)
+ transition_states(states = year, transition_length = 4, state_length = 1)
+ ease_aes("cubic-in-out")
+ labs(title = "Cancer survival rate per Year : {closest_state}"
, x = ""
, y = "Survival rate"
)
)
animate(plot1
, nframes = 100
, fps = 20
, width = 950
, height = 750
, end_pause = 10
)
anim_save("csurvival_overall.gif", plot1)
Cancer Survival by Gender and Race
plot2 <- (ggplot(working_df, aes(x = year, y = survival_rate, colour = cancer_type, group = cancer_type))
+ geom_line()
+ scale_x_continuous(breaks = seq(1963, 2013, 10))
+ scale_y_continuous(labels = percent)
+ geom_point()
+ geom_text(aes(y = survival_rate, label = cancer_type))
+ scale_color_viridis_d(name="")
+ facet_grid(gender ~ race)
+ transition_reveal(year)
+ theme(legend.position="none")
)
animate(plot2
, nframes = 100
, fps = 20
, width = 950
, height = 750
, end_pause = 10
)
anim_save("csurvival_racegender.gif", plot2)